---
title: "NEHA Execute"
output:
  html_document:
    toc: true
    toc_depth: 1
---

Article: "The Diffusion of State Firearm Regulations"
Author: Nicholas Hemauer


The working directory should be set to the root of the replication folder. No file names should need to be changed.

Tables 1 and 2 in the manuscript were handmade. Table 3, the figures, and models can be replicated with this script.

A "Figures" folder must be made in the replication folder root. When run, the figures should automatically be downloaded into the "Figures" folder.

```{r}

### Essentials & Data

set.seed(1337)

library(haven)
library(tidyverse)
library(summarytools)
library(patchwork)
library(ggplot2)
library(maps)
library(glmmTMB)
library(xtable)
library(texreg)
library(scales)
library(ggeffects)
library(igraph)

# Special case: Install 'neha' from GitHub
# Note: NEHA requires rJava. Therefore, the computer/HPC must have the Java Development Kit (JDK) installed
if (!requireNamespace("neha", quietly = TRUE)){
  if (!requireNamespace("devtools", quietly = TRUE)){
    install.packages("devtools")
  }
  devtools::install_github("desmarais-lab/neha")
  library(neha)
}

library(neha)

# Import Full Datasets
pres_2020_dataset <- read.csv("Presidential_Results_2020.csv")
cascade_long <- read.csv("Final_Long_Dataset.csv", row.names = 1)
cascade_long_scale <- read.csv("Final_Long_Scaled_Dataset.csv", row.names = 1)
cascade_long_scale_lag <- read.csv("Final_Long_Scaled_Dataset_lag.csv", row.names = 1)

# Remove Alaska and Hawaii from analysis (geographic neighbor diffusion estimation is inhibited)
cascade_long <- cascade_long %>% filter(State != "Alaska" & State != "Hawaii")
cascade_long_scale <- cascade_long_scale %>% filter(State != "Alaska" & State != "Hawaii")
cascade_long_scale_lag <- cascade_long_scale_lag %>% filter(State != "Alaska" & State != "Hawaii")

```

```{r}

### Descriptive Data Characteristics
## Runtime for section: 20 seconds

summarytab <- cascade_long[, c(1:2, 5:17)]

summarytab <- unique(summarytab)

colnames(summarytab) <- c("State", "Year", "Law Total", "Total Population", "White Proportion", "Mean Income", 
                          "Unemployment Rate", "Firearm Ownership", "Firearm Homicide Rate", "Firearm Suicide Rate",
                          "Citizen Ideology", "State Ideology", "Professionalism", "Female Legislator Prop.", 
                          "Neighbor Adopt Prop.")

cols <- colnames(summarytab[, 3:14])

print(descr(summarytab[, cols],
      stats = c("mean", "sd", "min", "max"), transpose = TRUE, order = "preserve", justify = "c"),
      file = "Figures/table3.html", 
      report.title = "Descriptive Summary Characteristics",
      footnote = "<b>Years:</b> 1991-2019<br/>")

### Create Combined Plot (Figure 2)
timegraph <- summarytab %>%
  group_by(Year) %>%
  summarise(
    Law_Total = sum(`Law Total`, na.rm = TRUE),
    Firearm_Homicides = sum(`Firearm Homicide Rate`, na.rm = TRUE)
  )

party_change_years <- c(1993, 2001, 2009, 2017)

law_based_groups <- summarytab %>%
  group_by(Year) %>%
  mutate(
    Law_Rank = ntile(`Law Total`, 10),
    Law_Group = case_when(
      Law_Rank == 10 ~ "Top 10%",
      Law_Rank == 1 ~ "Bottom 10%",
      Law_Rank == 5 ~ "Middle 10%",
      TRUE ~ NA_character_
    )
  )

law_trends <- law_based_groups %>%
  filter(!is.na(Law_Group)) %>%
  group_by(Year, Law_Group) %>%
  summarise(Law_Total = mean(`Law Total`, na.rm = TRUE), .groups = "drop")

homicide_trends <- law_based_groups %>%
  filter(!is.na(Law_Group)) %>%
  group_by(Year, Law_Group) %>%
  summarise(Firearm_Homicides = mean(`Firearm Homicide Rate`, na.rm = TRUE), .groups = "drop")

suicide_trends <- law_based_groups %>%
  filter(!is.na(Law_Group)) %>%
  group_by(Year, Law_Group) %>%
  summarise(Firearm_Suicides = mean(`Firearm Suicide Rate`, na.rm = TRUE), .groups = "drop")

law_trends$Law_Group <- factor(law_trends$Law_Group, levels = c("Top 10%", "Middle 10%", "Bottom 10%"))
homicide_trends$Law_Group <- factor(homicide_trends$Law_Group, levels = c("Top 10%", "Middle 10%", "Bottom 10%"))
suicide_trends$Law_Group <- factor(suicide_trends$Law_Group, levels = c("Top 10%", "Middle 10%", "Bottom 10%"))

law_y_min <- round(min(law_trends$Law_Total, na.rm = TRUE))
law_y_max <- round(max(law_trends$Law_Total, na.rm = TRUE))
homicide_y_min <- round(min(homicide_trends$Firearm_Homicides, na.rm = TRUE))
homicide_y_max <- round(max(homicide_trends$Firearm_Homicides, na.rm = TRUE))
suicide_y_min <- round(min(suicide_trends$Firearm_Suicides, na.rm = TRUE))
suicide_y_max <- round(max(suicide_trends$Firearm_Suicides, na.rm = TRUE))

# Firearm Law Trend Plot
p1 <- ggplot(law_trends, aes(x = Year, y = Law_Total, group = Law_Group)) +
  geom_line(aes(linetype = Law_Group), color = "black", size = 1) +
  geom_point(aes(shape = Law_Group), color = "black", size = 2) +
  geom_vline(xintercept = party_change_years, linetype = "dashed", color = "black", size = 0.5) +
  theme_minimal() +
  labs(x = "Year", y = "Firearm Law Count", linetype = NULL, shape = NULL) +
  scale_x_continuous(breaks = c(1991, seq(1995, 2015, by = 5), 2019)) +
  scale_y_continuous(breaks = c(law_y_min, seq(20, 80, by = 20), law_y_max)) +   
  theme(legend.position = "none")

# Firearm Homicide Trend Plot
p2 <- ggplot(homicide_trends, aes(x = Year, y = Firearm_Homicides, group = Law_Group)) +
  geom_line(aes(linetype = Law_Group), color = "black", size = 1) +
  geom_point(aes(shape = Law_Group), color = "black", size = 2) +
  geom_vline(xintercept = party_change_years, linetype = "dashed", color = "black", size = 0.5) +
  theme_minimal() +
  labs(x = "Year", y = "Firearm Homicide Rate", linetype = "Quantile (Total Laws)", shape = "Quantile (Total Laws)") +
  scale_x_continuous(breaks = c(1991, seq(1995, 2015, by = 5), 2019)) +
  scale_y_continuous(breaks = c(0, seq(2, 6, by = 2), homicide_y_max))

# Firearm Suicide Trend Plot
p3 <- ggplot(suicide_trends, aes(x = Year, y = Firearm_Suicides, group = Law_Group)) +
  geom_line(aes(linetype = Law_Group), color = "black", size = 1) +
  geom_point(aes(shape = Law_Group), color = "black", size = 2) +
  geom_vline(xintercept = party_change_years, linetype = "dashed", color = "black", size = 0.5) +
  theme_minimal() +
  labs(x = "Year", y = "Firearm Suicide Rate", linetype = NULL, shape = NULL) +
  scale_x_continuous(breaks = c(1991, seq(1995, 2015, by = 5), 2019)) +
  scale_y_continuous(breaks = c(0, seq(2, 10, by = 4), suicide_y_max)) +
  theme(legend.position = "none")

# Combine all plots
combined <- (p1 / p2 / p3) + plot_layout(guides = "collect")

ggsave("Figures/figure2.png", plot = combined, width = 10, height = 6, dpi = 300)

### US Firearm Law Map (2019 only) (Figure 1)
map_data_2019 <- summarytab %>% filter(Year == 2019)

us_states <- map_data("state")
colnames(us_states) <- c("long", "lat", "group", "order", "State", "subregion")
us_states$State <- str_to_title(us_states$State)

map_data_2019 <- us_states %>% left_join(map_data_2019, by = "State")

p4 <- ggplot(map_data_2019, aes(x = long, y = lat, group = group, fill = `Law Total`)) +
  geom_polygon(color = "white") +
  scale_fill_gradient(low = "lightgrey", high = "black", name = "Firearm Law Count", 
                      breaks = c(1, seq(30, 90, by = 30), 107)) +
  theme_minimal() +
  labs(title = NULL, x = NULL, y = NULL) +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.grid = element_blank()) +
  coord_map()

ggsave("Figures/figure1.pdf", plot = p4, width = 10, height = 6, dpi = 300)

```

```{r}

### Running NEHA (Models 1-3)
## Runtime for section: ~3 hours

## NEHA is RAM intensive; the RAM use will scale with dataset size
## Models take 30-45 minutes each to run
## A higher core count will use a greater amount of RAM

# Run NEHA demographic model
covariates <- colnames(cascade_long_scale[6:12])
system.time(neha_results_dem <- neha(cascade_long_scale, node = 'State', time = 'Year', cascade = 'Policy_Name', 
                                     event = 'Policy_Adopted', covariates = covariates, ncore = 2))

neha_estimate_dem <- glmmTMB(Policy_Adopted ~ Population + White_Pop + Mean_Income + Unemployment_Rate + 
                             Firearm_Ownership + Firearm_Homicide_Rate + Firearm_Suicide_Rate + edges_combined + 
                             factor(State) + (1 | Policy_Name),
                             data = neha_results_dem$data_for_neha,
                             family = binomial)

summary(neha_estimate_dem)

# Run NEHA institution model
covariates <- colnames(cascade_long_scale[c(5, 13:17)])
system.time(neha_results_inst <- neha(cascade_long_scale, node = 'State', time = 'Year', cascade = 'Policy_Name',
                                      event = 'Policy_Adopted', covariates = covariates, ncore = 2))

neha_estimate_inst <- glmmTMB(Policy_Adopted ~ Law_Total + Citizen_Ideology + State_Ideology + Professionalism +
                              Female_Legislators + Neighbor_Adopt_Prop + edges_combined + factor(State) + (1 | Policy_Name),
                              data = neha_results_inst$data_for_neha, family = binomial)

summary(neha_estimate_inst)

# Run NEHA full model
covariates <- colnames(cascade_long_scale[5:17])
system.time(neha_results_full <- neha(cascade_long_scale, node = 'State', time = 'Year', cascade = 'Policy_Name', 
                                      event = 'Policy_Adopted', covariates = covariates, ncore = 1))

neha_results_full$data_for_neha$State <- as.factor(neha_results_full$data_for_neha$State) # This line fixes the ggpredict() factor error 


neha_estimate_full <- glmmTMB(
  Policy_Adopted ~ Law_Total + Population + White_Pop + Mean_Income + 
    Unemployment_Rate + Firearm_Ownership + Firearm_Homicide_Rate + 
    Firearm_Suicide_Rate + Citizen_Ideology + State_Ideology + 
    Professionalism + Female_Legislators + Neighbor_Adopt_Prop + 
    edges_combined + State + (1 | Policy_Name),
  data = neha_results_full$data_for_neha,
  family = binomial
)

summary(neha_estimate_full)



dem_ex <- texreg::extract(neha_estimate_dem)
inst_ex <- texreg::extract(neha_estimate_inst)
full_ex <- texreg::extract(neha_estimate_full)

map_names <- list(
  "(Intercept)" = "Intercept",
  "Population" = "Population Log.",
  "White_Pop" = "White Prop.",
  "Mean_Income" = "Mean Income",
  "Unemployment_Rate" = "Unemployment Rate",
  "Firearm_Ownership" = "Firearm Owner Prop.",
  "Firearm_Homicide_Rate" = "Firearm Homicide Rate",
  "Firearm_Suicide_Rate" = "Firearm Suicide Rate",
  "Citizen_Ideology" = "Citizen Ideology",
  "State_Ideology" = "State Ideology",
  "Professionalism" = "Professionalism",
  "Female_Legislators" = "Female Legislator Prop.",
  "Neighbor_Adopt_Prop" = "Neighbor Adopt Prop.",
  "Law_Total" = "Restrictive Law Total",
  "edges_combined" = "Edges Combined"
)

gof_names <- c("AIC", "Log Likelihood", "Number of Observations:", "Number of Groups:", "Var:")

# Produce model figure in paper
htmlreg(list(dem_ex, inst_ex, full_ex), file = "Figures/NEHA_models.html", custom.coef.map = map_names, caption = NA, 
        custom.header = list("Policy Adoption" = 1:3), caption.above = TRUE, custom.gof.names = gof_names, 
        stars = c(0.001, 0.01, 0.05, 0.1))

save.image(file = "Figures/models.RData")

# Model with lagged neighbor adoption (Appendix Model 1)
covariates <- colnames(cascade_long_scale_lag[5:17])
system.time(neha_results_full_lag <- neha(cascade_long_scale_lag, node = 'State', time = 'Year', cascade = 'Policy_Name',
                                          event = 'Policy_Adopted', covariates = covariates, ncore = 2))

neha_estimate_full_lag <- glmmTMB(Policy_Adopted ~ Law_Total + Population + White_Pop + Mean_Income + 
                                  Unemployment_Rate + Firearm_Ownership + Firearm_Homicide_Rate + 
                                  Firearm_Suicide_Rate + Citizen_Ideology + State_Ideology + 
                                  Professionalism + Female_Legislators + Neighbor_Adopt_Prop_Lag + edges_combined + 
                                  factor(State) + (1 | Policy_Name),
                                  data = neha_results_full_lag$data_for_neha,
                                  family = binomial)

summary(neha_estimate_full_lag)

full_ex_lag <- texreg::extract(neha_estimate_full_lag)

map_names <- list(
  "(Intercept)" = "Intercept",
  "Population" = "Population Log.",
  "White_Pop" = "White Prop.",
  "Mean_Income" = "Mean Income",
  "Unemployment_Rate" = "Unemployment Rate",
  "Firearm_Ownership" = "Firearm Owner Prop.",
  "Firearm_Homicide_Rate" = "Firearm Homicide Rate",
  "Firearm_Suicide_Rate" = "Firearm Suicide Rate",
  "Citizen_Ideology" = "Citizen Ideology",
  "State_Ideology" = "State Ideology",
  "Professionalism" = "Professionalism",
  "Female_Legislators" = "Female Legislator Prop.",
  "Neighbor_Adopt_Prop_Lag" = "Lagged Neighbor Adopt Prop.",
  "Law_Total" = "Restrictive Law Total",
  "edges_combined" = "Edges Combined"
)

gof_names <- c("AIC", "Log Likelihood", "Number of Observations:", "Number of Groups:", "Var:")

htmlreg(list(full_ex_lag), file = "Figures/Appendix_model_1.html", custom.coef.map = map_names, caption = NA, 
        custom.header = list("Policy Adoption" = 1), caption.above = TRUE, custom.gof.names = gof_names, 
        stars = c(0.001, 0.01, 0.05, 0.1))

```

```{r}

### NEHA Figure Visualization
## Runtime for section: Less than 10 seconds

# Extract Ties from NEHA
results <- as.data.frame(neha_results_full$edges)
results <- separate_wider_delim(results, cols = 1, delim = "_", names = c("from", "to"))
results$from <- str_replace_all(results$from, "\\.", " ")

# Use the pres_2020_dataset and assign colors based on ideology (2020 Presidential Election Results)
state_colors <- pres_2020_dataset %>%
  select(State = state, Trump_pct = trump_pct) %>%
  mutate(
    Color = case_when(
      Trump_pct <= 47.5 ~ col_numeric(
        palette = c("gray80"),
        domain = c(0, 47.5)
      )(Trump_pct),
      Trump_pct >= 52.5 ~ col_numeric(
        palette = c("gray20"),
        domain = c(52.5, 100)
      )(Trump_pct),
      TRUE ~ col_numeric(
        palette = c("gray50"),
        domain = c(47.5, 52.5)
      )(Trump_pct)
    )
  )


# Graph results
if(requireNamespace("igraph", quietly = TRUE)){
    library(igraph)
    g <- graph_from_data_frame(d = results[, 1:2])

    # Map colors (2020 Presidential Election results) to nodes
    V(g)$color <- state_colors$Color[match(V(g)$name, state_colors$State)]

    # Calculate the degree of each node (number of ties)
    node_degree <- degree(g, mode = "out")
    node_degree <- node_degree * 3

    # Adjust node sizes: scale by degree with a minimum size of 3
    V(g)$size <- pmax(node_degree, 3) * 2 # Use pmax to ensure a minimum size of 3

    # Use Fruchterman-Reingold layout
    layout <- layout_with_fr(g)
    
    # Save the plot as a PNG file
    png("Figures/figure3.png", width = 2400, height = 1800, res = 300)

    # Plot the graph with node size proportional to the degree
    plot(g, layout = layout,
         edge.arrow.size = 0.3,
         vertex.color = V(g)$color, # Node color is political affiliation of state (2020 Presidential Election Results)
         vertex.label.color = "black",
         vertex.label.cex = 0.8,
         vertex.label.dist = 1.5)

    # Add a color legend
    legend("topright",  # Position of the legend
           legend = c("Democrat", "Moderate", "Republican"),  # Labels
           col = c("gray80", "gray50", "gray20"),  # Colors
           pch = 19,  # Use solid circles
           pt.cex = 2)  # Size of the points in the legend

    dev.off()
}

```

```{r}

### Logistic Regression Visualization
## Runtime for section: Less than 2 minutes

pred <- ggpredict(neha_estimate_full, terms = "Firearm_Suicide_Rate [all]", bias_correction = TRUE)

# Plot Suicide
plot <- ggplot(pred, aes(x = x, y = predicted)) +
  geom_line(color = "black", size = 1.0) + 
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2, fill = "#676767") + 
  labs(
    x = "Firearm Suicide Rate",
    y = "Predicted Outcome"
  ) +
  theme_minimal(base_size = 12)

ggsave("Figures/figure4.pdf", plot = plot, width = 6, height = 4)

pred <- ggpredict(neha_estimate_full, terms = "Firearm_Homicide_Rate [all]", bias_correction = TRUE)

# Plot Homicide
plot <- ggplot(pred, aes(x = x, y = predicted)) +
  geom_line(color = "black", size = 1.0) + 
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2, fill = "#676767") + 
  labs(
    x = "Firearm Homicide Rate",
    y = "Predicted Outcome"
  ) +
  theme_minimal(base_size = 12)

ggsave("Figures/figure5.pdf", plot = plot, width = 6, height = 4)

```